
**********
//Analytic code used to generate findings presented in the manuscript: "Incentivizing adherence to gender-affirming PrEP programs: a stated preference discrete choice experiment among transgender and gender nonbinary adults"
//Date analysis performed: 05.21.2024; 12.21.29; 11.29.23; 11/16/23; 11/1/23; 10/30/2023; 10/17/23, 10/11/23
//Author: Marta Wilson-Barthes
**********
//Format of Incentive/: Voucher 	Electronic cash card	Nothing (opt-out)
//Conditionality: Blood test	Blood and Hair test	Nothing (opt-out)
//Hormone Co-prescription: Oral hormones	Injectable hormones	Nothing (opt-out)
//Counseling: In-person	Online	Nothing (opt-out)
//Amount of Incentive: Medium ($100)	High ($200)	$0 (opt-out)
**********

clear all
capture log close
macro drop _all
set more off, permanently
set linesize 255

//set global directories 
pwd
global folder "P:\Omar Projects\DCE Seattle (PI Arjee Restar)\PATH Study DCE A and B Datasets 2023.08.22"
global sourcedir "$folder\Source"
global outputdir "$folder\Output"
global codedir "$folder\Code"

log using "$outputdir/PATHDCEA.log", replace

//change directory to source folder
cd "$sourcedir"	
pwd

********************************

*Read in the data for DCE A
use "$sourcedir\PATH_DCE_SETA.dta", clear


******************* data formatting 
label var ALT "Program Alternatives"
	capture label define altl 0"0. No Program" 1"1. Program A" 2"2. Program B" 
	label values ALT altl

label var CHOICE1 "First program choice"
	capture label define c1 0"0. Not Chosen" 1"1. Chosen" 
	label values CHOICE1 c1

label var CHOICE2 "Second program choice(of remaining programs)"
	capture label define c2 0"0. Chosen" 1"1. Not Chosen" 
	label values CHOICE2 c2

*format data set like iSAY dataset
gen format0	= 1 if FORMAT==0
replace format0 = 0 if FORMAT!=0
	
gen format1	= 1 if FORMAT==1
replace format1 = 0 if FORMAT!=1

gen format2	= 1 if FORMAT==2
replace format2 = 0 if FORMAT!=2

gen cond0 = 1 if CONDITION==0
replace cond0 = 0 if CONDITION!=0
	
gen cond1	= 1 if CONDITION==1
replace cond1 = 0 if CONDITION!=1

gen cond2	= 1 if CONDITION==2
replace cond2 = 0 if CONDITION!=2

gen horm0	= 1 if HORMONE==0
replace horm0 = 0 if HORMONE!=0
	
gen horm1	= 1 if HORMONE==1
replace horm1 = 0 if HORMONE!=1

gen horm2	= 1 if HORMONE==2
replace horm2 = 0 if HORMONE!=2

gen counsel0 = 1 if COUNSEL==0
replace counsel0 = 0 if COUNSEL!=0
	
gen counsel1 = 1 if COUNSEL==1
replace counsel1 = 0 if COUNSEL!=1

gen counsel2 = 1 if COUNSEL==2
replace counsel2 = 0 if COUNSEL!=2

gen amount0 = 1 if AMOUNT==0
replace amount0 = 0 if AMOUNT!=0
	
gen amount1 = 1 if AMOUNT==1
replace amount1 = 0 if AMOUNT!=1

gen amount2 = 1 if AMOUNT==2
replace amount2 = 0 if AMOUNT!=2

*gen year incentive amount, hypothetical program adminsitered every 2 months
gen yramount = AMOUNT*6
gen logyroffer = log(yramount+.00001)
replace logyroffer = 0 if yramount==0

******************* explode dataset to use ordered mix logit
sort ID block TRIAL 
gen byte begin = TRIAL != TRIAL[_n-1]
gen spell = sum(begin)
drop begin
rename spell _caseid

gen _panelaltid = ALT

gen choice1 = CHOICE1
gen choice2 = CHOICE2
***
gen n=0
sort _caseid
by _caseid: replace n=2 if CHOICE1==0
expand n, generate(quest2) 
gen question=1
replace question=2 if quest2==1 //quest2==1 indicates duplicate observations

*create choice3 variable which takes into account first-best and second-best options combined
gen choice3=choice1
replace choice3=1 if choice2==1 & quest2==1 //i.e., combine first choice on question 1 and second-best choice on question 2

*create new set_id_n to account for second-best
capture drop set_id_n 
capture drop set_id
gen str10 set_id = string(ID,"%06.0f") + string(TRIAL,"%02.0f") + string(question,"%02.0f")
destring set_id, gen(set_id_n)
sum set_id_n, d

*CHECK to make sure new set_id_n variable takes account of second-best
sort _caseid quest2

*declare choice models
*create new t2 to account for second-best
gen t = TRIAL
gen t2=t
replace t2=t*2 if question==2
replace t2=(t*2)-1 if question==1
sort ID t2 quest2
gen id=ID
sort id t2 quest2

*declare the data in memory to be choice model data.
cmset id t2 ALT
cmchoiceset

*create binary dummy variables for alternative-specific vars;  Dummy-variable coding (1=first choice, 0=all other choices (second best and last choice combined))
gen format_cash = 1 if FORMAT==2
replace format_cash = 0 if FORMAT!=2
	capture label define frmt 0"Opt Out or Voucher" 1"Electronic cash card"
	label values format_cash frmt
	
gen format_voucher = 1 if FORMAT==1
replace format_voucher = 0 if FORMAT!=1
	capture label define frmtv 0"Opt Out or Cash" 1"Voucher"
	label values format_voucher frmtv
	
gen condition_blood = 1 if CONDITION==1
replace condition_blood = 0 if CONDITION !=1
	capture label define cnd 0"Opt Out or Blood test & hair test " 1"Blood test"
	label values condition_blood cnd
	
gen condition_bloodhair = 1 if CONDITION==2
replace condition_bloodhair = 0 if CONDITION !=2
	capture label define cnd2 0"Opt Out or Blood test only" 1"Blood and hair test"
	label values condition_bloodhair cnd2	

gen hormone_injectable = 1 if HORMONE==2
replace hormone_injectable = 0 if HORMONE !=2
	capture label define hrmn 0"Opt Out or Oral Hormones" 1"Injectable Hormones"
	label values hormone_injectable hrmn
	
gen hormone_oral = 1 if HORMONE==1
replace hormone_oral = 0 if HORMONE !=1
	capture label define hrmn1 0"Opt Out or Injectable Hormones" 1"Oral Hormones"
	label values hormone_oral hrmn1

gen counsel_online = 1 if COUNSEL==2
replace counsel_online = 0 if COUNSEL !=2
	capture label define cnsl 0"Opt Out or In-Person Counseling" 1"Online Counseling"
	label values counsel_online cnsl

gen counsel_iperson = 1 if COUNSEL==1
replace counsel_iperson = 0 if COUNSEL !=1
	capture label define cnsl1 0"Opt Out or Online Counseling" 1"In-person Counseling"
	label values counsel_iperson cnsl1	

gen amount_200 = 1 if AMOUNT==2
replace amount_200 = 0 if AMOUNT!=2
	capture label define amt 0"Opt Out or $100 Incentive" 1"$200 Incentive"
	label values amount_200 amt

gen amount_100 = 1 if AMOUNT==1
replace amount_100 = 0 if AMOUNT!=1
	capture label define amt1 0"Opt Out or $200 Incentive" 1"$100 Incentive"
	label values amount_100 amt1

rename yramount yramount100s

*Define list of explanatory alternative-specific variables - which vary across both cases and alternatives
global xlist1 yramount100s format_cash format_voucher condition_blood hormone_injectable hormone_oral counsel_online counsel_iperson amount_200

*table
labsumm $xlist1 if t2==1 & choice1==1

**Create list of explanatory case-specific variables - which vary across only cases
	*race var
	gen race = . 
	replace race = 1 if race_1==1 //Asian/Asian American
	replace race = 2 if race_2==1 //Black/African American
	replace race = 3 if race_3==1 //Hispanic, Latino, Spanish origin
	replace race = 4 if race_4==1 //Middle Eastern/North African
	replace race = 5 if race_5==1 //Native Hawaiian/PI
	replace race = 6 if race_6==1 //White
	replace race = 7 if race_7==1 //American Indian
	replace race = 8 if race_8==1 //Other
		capture label define rc 1"Asian/Asian American" 2"Black/AA" 3"Hispanic, Latino, Spanish origin" 4"Middle Eastern/North African" 5"Native Hawaiian/PI" 6"White" 7"American Indian" 8"Other"
		label values race rc

	*age squared
	gen age2=age^2

global xlist2 age age2 genderid birthsex race education income hinsure everprep eversexwork livegeo

*table 1
labsumm $xlist2 if t2==1 & choice1==1

**gen table 1 descriptives
preserve
bysort ID: keep if _n==1
table1_mc, /// 
			vars(		genderid cate \ 		    ///
						birthsex cate \ 			///
						age contn %4.2f \  			///
						race cate \ 				///
						hinsure cate \ 				///
						everprep cate\ 				///
						eversexwork cate \ 			///
						livegeo cate \ 				///
						education cate \ 			///
						income cate )  nospace percent_n onecol missing test total(before) 	saving("$sourcedir\table 1.xlsx", replace)			
restore


******Preference weights for the conditional logit model
capture drop coefplotformat.gph coefplotamount.gph coefplotcondition.gph coefplothormone.gph coefplotcounsel.gph
clogit CHOICE1 i.FORMAT, group(set_id_n)
estimates store clogit1
coefplot, keep(*:) omitted baselevels vertical nooffsets xline(0) citop barwidt(0.5) color(*.6) recast(bar) ciopts(recast(rcap))  xtitle("Amounts") ytitle("Preference Weights") ylabel(, labsize(vsmall)) ///
saving(coefplotformat)

clogit CHOICE1 i.CONDITION, group(set_id_n)
estimates store clogit2
coefplot, keep(*:) omitted baselevels vertical nooffsets xline(0) citop barwidt(0.5) color(*.6) recast(bar) ciopts(recast(rcap))  xtitle("Payment Format") ytitle("Preference Weights") ylabel(, labsize(vsmall)) ///
saving(coefplotcondition)

clogit CHOICE1 i.HORMONE, group(set_id_n)
estimates store clogit3
coefplot, keep(*:) omitted baselevels vertical nooffsets xline(0) citop barwidt(0.5) color(*.6) recast(bar) ciopts(recast(rcap))  xtitle("Payment Type") ytitle("Preference Weights") ylabel(, labsize(vsmall)) ///
saving(coefplothormone)

clogit CHOICE1 i.COUNSEL, group(set_id_n)
estimates store clogit4
coefplot, keep(*:) omitted baselevels vertical nooffsets xline(0) citop barwidt(0.5) color(*.6) recast(bar) ciopts(recast(rcap))  xtitle("Test Type") ytitle("Preference Weights") ylabel(, labsize(vsmall)) ///
saving(coefplotcounsel)

clogit CHOICE1 i.AMOUNT, group(set_id_n)
estimates store clogit5
coefplot, keep(*:) omitted baselevels vertical nooffsets xline(0) citop barwidt(0.5) color(*.6) recast(bar) ciopts(recast(rcap))  xtitle("Test Type") ytitle("Preference Weights") ylabel(, labsize(vsmall)) ///
saving(coefplotamount)

*combine graphs of mean preference weights 
graph combine coefplotformat.gph coefplotamount.gph coefplotcondition.gph coefplothormone.gph coefplotcounsel.gph

global xlist2 age age2 genderid birthsex race education income hinsure everprep eversexwork livegeo

***Estimate MNL model [conditional logit regression, CLR, McFadden's choice model] using yearly offer in US100s
cmclogit choice3 $xlist1, casevars($xlist2) vce(cluster ID) basealternative(0)
estimates store CLRa
estat ic
outreg2 using tab2.doc, addstat(Log-likelihood,`e(ll)', parameters, `e(k)') replace ctitle(CLRa)
test genderid
test age //p=0.09
test everprep //p=0.000
test hinsure //p=0.000
margins, at(age=(18(10)56))
marginsplot, recast(line) ciopts(recast(rarea) color(%20)) //warning: variance matrix is nonsymmetric or highly singular.

//"is most likely
//> due to one or more sparse indicator variables.  By sparse indicator, I
//> mean a variable that takes on the values 0 or 1 (or missing) and is 1
//> for a very small proportion of the observations. The best example is
//> an indicator variable that identifies 1 observation; this will
//> invalidate the large sample theory that the robust variance estimator
//> depends upon for the coefficient on this variable -- however, you can
//> simply remove this variable from the model to solve the problem."//

//take out the covariates that have 100 observations or fewer for a given category:
global xlist2 age age2 birthsex income education everprep eversexwork livegeo

***Estimate MNL model [conditional logit regression, CLR, McFadden's choice model] using yearly offer in US100s
cmclogit choice3 $xlist1, casevars($xlist2) vce(cluster ID) basealternative(0)
margins, at(age=(18(10)60))
marginsplot, recast(line) ciopts(recast(rarea) color(%20)) 


*check effect of excluding case variables
cmclogit choice3 $xlist1, casevars(age) vce(cluster ID) basealternative(0)
estimates store CLRb
estat ic
outreg2 using tab2.doc, addstat(Log-likelihood,`e(ll)', parameters, `e(k)') append ctitle(CLRb)



cmclogit choice3 $xlist1, casevars(age) vce(cluster ID) 
margins, at(age=(18(10)58))
marginsplot, recast(line) ciopts(recast(rarea) color(%20)) //<-- this is the figure presented in the paper
graph save "Graph" "P:\Omar Projects\DCE Seattle (PI Arjee Restar)\PATH Study DCE A and B Datasets 2023.08.22\Output\Graph_PM_age.gph", replace


cmclogit choice3 $xlist1, casevars(income) vce(cluster ID) 
margins, at(income=(1(1)8))
marginsplot, recast(line) ciopts(recast(rarea) color(%20)) 
graph save "Graph" "P:\Omar Projects\DCE Seattle (PI Arjee Restar)\PATH Study DCE A and B Datasets 2023.08.22\Output\Graph_PM_income.gph", replace

ssc install grc1leg
grc1leg Graph_PM_age.gph Graph_PM_income.gph, legendfrom(Graph_PM_age.gph)

***Estimate MROL model with normally distributed intercept
global xlist2 age race_1 race_2 race_3 race_4 race_5 race_6 race_7 race_8 i.genderid i.birthsex i.education i.income i.everprep i.eversexwork i.livegeo
cmxtmixlogit choice3 yramount100s, rand(format_cash condition_blood hormone_injectable counsel_online) casevars($xlist2) vce(cluster ID) collinear intpoints(5) favor(speed) basealternative(0)
estimates store MROLa
estat ic
outreg2 using app3.doc, addstat(Log-likelihood, `e(ll)', parameters, `e(k)') replace ctitle(MROLa)



***Estimate MROL model without clustering
mixlogit choice3 yramount100s, group(set_id_n) rand(format_cash condition_blood hormone_injectable counsel_online) id(ID) robust
estimates store MROLb
outreg2 using app3.doc, addstat(Log-likelihood, `e(ll)', parameters, `e(k)') replace ctitle(MROLb)
estat ic

***Estimate the scale heterogeneity (SH) model [differences in the error variance of choices]
global xlist1 yramount100s format_cash condition_blood hormone_injectable counsel_online 
gmnl choice3 $xlist1, group(set_id_n) id(ID) nrep(1000) seed(12345)
estimates store GMNL
outreg2 using tab3.doc, addstat(Log-likelihood,`e(ll)', parameters, `e(k)') append ctitle(GMNL)
estat ic


************** Calculate WTP estimates 
estimates restore CLRa
nlcom (format:-_b[format_cash]/_b[yramount100s]) ///
(Counsel:-_b[counsel_online]/_b[yramount100s]) ///
(hormone:-_b[hormone_injectable]/_b[yramount100s]) ///
(condition:-_b[condition_blood]/_b[yramount100s]) 

estimates restore MROLa
nlcom (format_cash:-_b[format_cash]/_b[yramount100s]) ///
(Counsel_online:-_b[counsel_online]/_b[yramount100s]) ///
(hormone_inject:-_b[hormone_injectable]/_b[yramount100s]) ///
(condition_blood:-_b[condition_blood]/_b[yramount100s]) 

estimates restore MROLb
nlcom (format_cash:-_b[format_cash]/_b[yramount100s]) ///
(Counsel_online:-_b[counsel_online]/_b[yramount100s]) ///
(hormone_inject:-_b[hormone_injectable]/_b[yramount100s]) ///
(condition_blood:-_b[condition_blood]/_b[yramount100s]) 

estimates restore GMNL
nlcom (format:-_b[format_cash]/_b[yramount100s]) ///
(Counsel:-_b[counsel_online]/_b[yramount100s]) ///
(hormone:-_b[hormone_injectable]/_b[yramount100s]) ///
(condition:-_b[condition_blood]/_b[yramount100s]) 



********Robustness tests

*check for orthogonality
global xlist2 age race_1 race_2 race_3 race_4 race_5 race_6 race_7 race_8 genderid birthsex education income everprep eversexwork livegeo
*using Pearson's correlation coefficient (Pearson's Product of Moments, PPM)
pwcorr $xlist1 $xlist2, print(.20) star(.01)
pwcorr $xlist1 $xlist2, star(.01)

pwcorr $xlist1 $xlist2
matrix A = r(C)
matrix C = J(16, 16, 0)
forvalues i=1/16 {
    forvalues j=1/16 {
        matrix C[`i',`j']= round(A[`i',`j'], 0.001)    
    }
}
matrix list C

estpost correlate $xlist1 $xlist2, matrix listwise
est store c1
esttab * using corrmatr.rtf, unstack not noobs compress replace

estpost correlate $xlist1 $xlist2, matrix listwise
est store c1
esttab * using corr.csv, unstack not noobs compress replace
*all correlations are sufficiently low as not to cause concern

*minimum overlap:
tab1 FORMAT CONDITION HORMONE COUNSEL AMOUNT



********Random Parameters Logit Model for CROI Abstract Figure

mixlogit choice3, group(set_id_n) rand(amount_200 amount_100) id(ID) robust
estimates store amount_pw
scalar beta_200 = _b[amount_200]
scalar beta_100 = _b[amount_100]
gen pw_amount_200 = beta_200 / (beta_200+beta_100)
gen pw_amount_100 = beta_100 / (beta_200+beta_100)

gen amount_0 = 1 if AMOUNT==0
replace amount_0=0 if AMOUNT!=0

mixlogit choice3, group(set_id_n) rand(amount_200 amount_100) id(ID) robust
estimates store amount_pw
scalar beta_200 = _b[amount_200]
scalar beta_100 = _b[amount_100]
capture drop pw_amount_200 pw_amount_100
gen pw_amount_200 = beta_200 / (beta_200+beta_100)
gen pw_amount_100 = beta_100 / (beta_200+beta_100)

mixlogit choice3, group(set_id_n) rand(format_cash format_voucher) id(ID) robust
estimates store format_pw
scalar beta_cash = _b[format_cash]
scalar beta_voucher = _b[format_voucher]
gen pw_cash = beta_cash / (beta_cash+beta_voucher)
gen pw_voucher = beta_voucher/ (beta_cash+beta_voucher)

mixlogit choice3, group(set_id_n) rand(condition_blood condition_bloodhair) id(ID) robust
estimates store condition_pw
scalar beta_bloodhair = _b[condition_bloodhair]
scalar beta_blood = _b[condition_blood]
gen pw_bloodhair = beta_bloodhair / (beta_bloodhair+beta_blood)
gen pw_blood = beta_blood/ (beta_bloodhair+beta_blood)

mixlogit choice3, group(set_id_n) rand(hormone_inject hormone_oral) id(ID) robust
estimates store hormone_pw
scalar beta_inject = _b[hormone_inject]
scalar beta_oral = _b[hormone_oral]
gen pw_inject = beta_inject / (beta_inject+beta_oral)
gen pw_oral = beta_oral/ (beta_inject+beta_oral)

mixlogit choice3, group(set_id_n) rand(counsel_iperson counsel_online) id(ID) robust
estimates store counsel_pw
scalar beta_online = _b[counsel_online]
scalar beta_person = _b[counsel_iperson]
gen pw_online = beta_online / (beta_online+beta_person)
gen pw_person = beta_person/ (beta_online+beta_person)

estimates restore counsel_pw
coefplot,  vertical   xlab(, angle(90))  ciopts(recast(rcap)) recast(connected)
estimates restore hormone_pw
coefplot,  vertical   xlab(, angle(90))  ciopts(recast(rcap)) recast(connected)
estimates restore format_pw
coefplot,  vertical   xlab(, angle(90))  ciopts(recast(rcap)) recast(connected)
estimates restore condition_pw
coefplot,  vertical   xlab(, angle(90))  ciopts(recast(rcap)) recast(connected)
estimates restore amount_pw
coefplot,  vertical   xlab(, angle(90))  ciopts(recast(rcap)) recast(connected)

gr combine Graph_hormone.gph Graph_format.gph Graph_counsel.gph Graph_condition.gph Graph_amount.gph



clear 
log close


